# @author:Wei Junjie
# @time:2024/5/26 19:19
# @file baidu.py
# --*--coding: utf-8 --*--
"""
模块说明
"""
import json
import time
from urllib import request,parse
import requests
from bs4 import BeautifulSoup
from lxml import etree
from fake_useragent import UserAgent
import execjs
from selenium import webdriver
from selenium.webdriver.common.by import By

BASE_URL = 'https://www.baidu.com/'
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'




def get_html_demo1():
    options = webdriver.ChromeOptions()
    # 无头模式
    options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    driver.implicitly_wait(10)
    driver.get(BASE_URL)
    driver.find_element(By.ID, 'kw').send_keys('三上悠亚')
    driver.find_element(By.ID, 'su').click()
    time.sleep(5)
    height=driver.execute_script('return document.body.scrollHeight')
    for item in range(20):
     driver.execute_script(f'scrollTo(0, {(height*item*5/100)});')
    time.sleep(3)
    content = driver.page_source

    soup = BeautifulSoup(content,'html.parser')
    print(soup.prettify())
    print()


"""解析响应
    soup = BeautifulSoup(response.text,'html.parser')
    res=soup.find('input',id='kw')
    print(res.next_sibling)"""

def get_html_demo2(url):
    response = requests.get(url,headers={'User-Agent':USER_AGENT},timeout=20)
    response.encoding=response.apparent_encoding
    print(response.text)
    # 解析响应
    htm_obj=etree.HTML(response.text)
    x_list=htm_obj.xpath("//a[@href='http://news.baidu.com']/text()")
    print(x_list)

if __name__ == '__main__':
    get_html_demo1()



